# ============================================================================
# Replication Script for the US Original Survey (Appendices)
# Gender Differences in Immigration Attitudes in the U.S.
# Starting from us_replication_data.RData
# ============================================================================

# Load necessary libraries
library(plyr)       
library(tidyverse)  
library(broom)      
library(ggrepel)   
library(ggeffects)  
library(scales)     
library(stargazer)  
library(forcats)    

# ============================================================================
# LOAD REPLICATION DATA
# ============================================================================

# Load the replication dataset
load("us_replication_data.RData")

# ============================================================================
# APPENDIX J2 & J3: American Women's Feelings (Combined Figure) - FIXED
# ============================================================================

# Convert conditions to factor with proper levels
replication_data$conditions <- factor(replication_data$conditions,
                                      levels = c("Control",
                                                 "Treat1. High-skilled Immigration",
                                                 "Treat2. Low-skilled Immigration"))
replication_data$conditions <- relevel(replication_data$conditions, ref = "Control")

# Regression Models
lm.us.w <- lm(ft_wom_r ~ conditions:job.lowskill:female +
                conditions + job.lowskill + female +
                inc_6_r + edu_6_r + age_6_r + pride_5_r + 
                econ_notsat_5_r + econ_own_notsat_5_r +
                race_5 + immi.first +
                job_conf_5_r + 
                pid_REP,
              data = replication_data)
lm.us.m <- lm(ft_men_r ~ conditions:job.lowskill:female +
                conditions + job.lowskill + female +
                inc_6_r + edu_6_r + age_6_r + pride_5_r + 
                econ_notsat_5_r + econ_own_notsat_5_r +
                race_5 + immi.first +
                job_conf_5_r + 
                pid_REP,
              data = replication_data)


# Generate predicted values for immigrant women outcome
predictions_women <- ggpredict(lm.us.w, terms = c("conditions", "job.lowskill", "female")) %>%
  filter(facet == 1) %>%  # American women only
  mutate(outcome = "Feelings toward Immigrant Women") %>%
  filter(!is.na(x)) # Extra step to filter out NAs created by ggpredict()

# Generate predicted values for immigrant men outcome  
predictions_men <- ggpredict(lm.us.m, terms = c("conditions", "job.lowskill", "female")) %>%
  filter(facet == 1) %>%  # American women only
  mutate(outcome = "Feelings toward Immigrants Men") %>%
  filter(!is.na(x)) # Extra step to filter out NAs created by ggpredict()

# Combine predictions from both models for comparative visualization
marginal_effects <- bind_rows(predictions_women, predictions_men) 

# Set factor levels for proper ordering in plot
marginal_effects$x <- factor(marginal_effects$x, levels = c("Control", 
                                                            "Treat1. High-skilled Immigration", 
                                                            "Treat2. Low-skilled Immigration"))

# Set outcome factor levels to match the correct legend order
marginal_effects$outcome <- factor(marginal_effects$outcome, 
                                   levels = c("Feelings toward Immigrant Women", "Feelings toward Immigrants Men"))


# Create the combined plot following the sample structure
figure_j2j3 <- ggplot(marginal_effects, aes(y = predicted, x = factor(x), 
                                            color = factor(group), shape = factor(group), 
                                            linetype = outcome, group = interaction(group, outcome))) +
  geom_point(position = position_dodge(width = 1), size = 5, na.rm = TRUE) +
  geom_linerange(aes(ymin = conf.low, ymax = conf.high), 
                 position = position_dodge(width = 1), linewidth = 1.1, na.rm = TRUE) +
  labs(x = "Experimental Conditions",
       y = "Predicted Values of American Women's Feelings",
       color = "Participant's Job Type",
       shape = "Participant's Job Type",
       linetype = "Outcome") + 
  scale_color_manual(values = c("#3CB371", "#191970"), 
                     labels = c("High-skilled Jobs", "Low-skilled Jobs")) +
  scale_shape_manual(values = c(16, 16), 
                     labels = c("High-skilled Jobs", "Low-skilled Jobs")) +
  scale_linetype_manual(values = c("solid", "dotted")) +
  facet_wrap(~ x, scales = "free_x", nrow = 1) +
  theme_bw() +
  theme(strip.text = element_text(size = 13, face = "bold"),
        axis.title.y = element_text(size = 13),
        axis.text.x = element_blank(),    
        axis.title.x = element_text(size = 13),
        axis.ticks.x = element_blank(),  
        legend.text = element_text(size = 13),
        legend.position = "bottom",
        legend.box = "vertical") + 
  guides(color = guide_legend(title = "Participant's Job Type"),
         shape = guide_legend(title = "Participant's Job Type"),
         linetype = guide_legend(title = "Outcome"))

print(figure_j2j3)


# ============================================================================
# APPENDIX J4 & J5: American Men's Feelings (Combined Figure)
# ============================================================================

# Run regression models
lm.us.w <- lm(ft_wom_r ~ conditions:job.lowskill:female +
              conditions + job.lowskill + female +
              inc_6_r + edu_6_r + age_6_r + pride_5_r + 
              econ_notsat_5_r + econ_own_notsat_5_r +
              race_5 + immi.first +
              job_conf_5_r + 
              pid_REP,
            data = replication_data)
lm.us.m <- lm(ft_men_r ~ conditions:job.lowskill:female +
                conditions + job.lowskill + female +
                inc_6_r + edu_6_r + age_6_r + pride_5_r + 
                econ_notsat_5_r + econ_own_notsat_5_r +
                race_5 + immi.first +
                job_conf_5_r + 
                pid_REP,
              data = replication_data)

# Generate predicted values for immigrant women outcome
predictions_women <- ggpredict(lm.us.w, terms = c("conditions", "job.lowskill", "female")) %>%
  filter(facet == 0) %>%  # American men only
  mutate(outcome = "Feelings toward Immigrant Women") %>%
  filter(!is.na(x)) # Extra step to filter out NAs created by ggpredict()

# Generate predicted values for immigrant men outcome  
predictions_men <- ggpredict(lm.us.m, terms = c("conditions", "job.lowskill", "female")) %>%
  filter(facet == 0) %>%  # American men only
  mutate(outcome = "Feelings toward Immigrants Men") %>%
  filter(!is.na(x)) # Extra step to filter out NAs created by ggpredict()

# Combine predictions from both models for comparative visualization
marginal_effects <- bind_rows(predictions_women, predictions_men) 

# Set factor levels for proper ordering in plot
marginal_effects$x <- factor(marginal_effects$x, levels = c("Control", 
                                                            "Treat1. High-skilled Immigration", 
                                                            "Treat2. Low-skilled Immigration"))

# Set outcome factor levels to match the correct legend order
marginal_effects$outcome <- factor(marginal_effects$outcome, 
                                   levels = c("Feelings toward Immigrant Women", "Feelings toward Immigrants Men"))


# Create the combined plot following the sample structure
figure_j4j5 <- ggplot(marginal_effects, aes(y = predicted, x = factor(x), 
                                            color = factor(group), shape = factor(group), 
                                            linetype = outcome, group = interaction(group, outcome))) +
  geom_point(position = position_dodge(width = 1), size = 5, na.rm = TRUE) +
  geom_linerange(aes(ymin = conf.low, ymax = conf.high), 
                 position = position_dodge(width = 1), linewidth = 1.1, na.rm = TRUE) +
  labs(x = "Experimental Conditions",
       y = "Predicted Values of American Men's Feelings",
       color = "Participant's Job Type",
       shape = "Participant's Job Type",
       linetype = "Outcome") + 
  scale_color_manual(values = c("#3CB371", "#191970"), 
                     labels = c("High-skilled Jobs", "Low-skilled Jobs")) +
  scale_shape_manual(values = c(16, 16), 
                     labels = c("High-skilled Jobs", "Low-skilled Jobs")) +
  scale_linetype_manual(values = c("solid", "dotted")) +
  facet_wrap(~ x, scales = "free_x", nrow = 1) +
  theme_bw() +
  theme(strip.text = element_text(size = 13, face = "bold"),
        axis.title.y = element_text(size = 13),
        axis.text.x = element_blank(),    # Hide x-axis labels for cleaner appearance
        axis.title.x = element_text(size = 13),
        axis.ticks.x = element_blank(),   # Remove x-axis tick marks
        legend.text = element_text(size = 13),
        legend.position = "bottom",
        legend.box = "vertical") + 
  guides(color = guide_legend(title = "Participant's Job Type"),
         shape = guide_legend(title = "Participant's Job Type"),
         linetype = guide_legend(title = "Outcome"))

print(figure_j4j5)

# ============================================================================
# APPENDIX L4: Gender of Immigrant Thought Of
# ============================================================================

# Create subsets for each treatment condition
us_high <- replication_data %>% filter(conditions == "Treat1. High-skilled Immigration")
us_low <- replication_data %>% filter(conditions == "Treat2. Low-skilled Immigration")

# Calculate proportions
prop_high <- 100 * prop.table(table(us_high$immi_who_gender))
prop_low <- 100 * prop.table(table(us_low$immi_who_gender))

# Create data frame for plotting
df_img <- data.frame(
  category = c("Female", "Male", "Other", "Female", "Male", "Other"),
  proportion = c(prop_high["Female"], prop_high["Male"], prop_high["Other (Please specify)"], 
                 prop_low["Female"], prop_low["Male"], prop_low["Other (Please specify)"]),
  dataset = c(rep("Treat1. High-skilled Immigration", 3), rep("Treat2. Low-skilled Immigration", 3))
)

df_img$category <- factor(df_img$category, levels = c("Male", "Female", "Other"))
df_img$proportion_label <- sprintf("%.1f%%", df_img$proportion)

# Create the gender figure
fill_colors <- c("#fc8d62", "#66c2a5", "#8da0cb")  # Orange, Teal, Purple

figure_l4 <- ggplot(df_img, aes(x = dataset, y = proportion, fill = category)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7) +
  geom_text(aes(label = proportion_label),
            position = position_dodge(width = 0.7), vjust = -0.5, size = 3.5) +
  labs(x = "Experimental Conditions",
       y = "Percentage",
       fill = "Immigrant's Gender Thought") +
  scale_fill_manual(values = fill_colors) +
  theme_minimal() +
  theme(axis.title.y = element_text(size = 13),
        axis.text.x = element_text(size = 11, face = "bold"), 
        axis.title.x = element_text(size = 13), 
        legend.text = element_text(size = 13))

print(figure_l4)

# ============================================================================
# APPENDIX L5: Regional Origin of Immigrant Thought Of
# ============================================================================

# Calculate proportions
prop_high <- 100 * prop.table(table(us_high$immi_who_region))
prop_low <- 100 * prop.table(table(us_low$immi_who_region))

# Create data frames for each dataset
df_high <- data.frame(
  region = names(prop_high),
  proportion = as.numeric(prop_high),
  dataset = "Treat1. High-skilled Immigration"
)
df_low <- data.frame(
  region = names(prop_low),
  proportion = as.numeric(prop_low),
  dataset = "Treat2. Low-skilled Immigration"
)

# Manually change region names to match original
df_high$region <- c("Other", "Asia", "Europe", "Latin America", "Middle East")
df_low$region <- c("Other", "Asia", "Europe", "Latin America", "Middle East")

# Combine the data frames
df_combined <- rbind(df_high, df_low)
df_combined$region <- as.factor(df_combined$region)

# Calculate combined proportions for ordering
combined_proportions <- aggregate(proportion ~ region, data = df_combined, sum)
df_combined$region <- factor(df_combined$region, 
                             levels = combined_proportions$region[order(combined_proportions$proportion, decreasing = TRUE)])

# Define color map
color_map <- c(
  "Asia" = "#66c2a5",
  "Europe" = "#fc8d62",
  "Latin America" = "#8da0cb",
  "Middle East" = "#e78ac3",
  "Other" = "#a6d854"
)

# Format proportions
df_combined$proportion_label <- sprintf("%.1f%%", df_combined$proportion)

# Create the regional origin figure
figure_l5 <- ggplot(df_combined, aes(x = proportion, y = fct_reorder(region, proportion), fill = region)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.9), width = 0.4) +
  geom_text(aes(label = proportion_label),
            position = position_dodge(width = 0.9), hjust = -0.1, 
            size = 3, fontface = "bold") +
  facet_wrap(~dataset, scales = "free_y") +
  labs(x = "Percentage",
       y = "Immigrant's Origin Thought") +
  scale_fill_manual(values = color_map) +
  theme_minimal() +
  theme(axis.title.y = element_text(size = 13),
        axis.text.y = element_text(size = 13), 
        axis.text.x = element_text(size = 13), 
        axis.title.x = element_text(size = 13),
        legend.position = "none",
        strip.text = element_text(size = 13, face = "bold"),
        plot.margin = margin(t = 1, r = 0.5, b = 1, l = 0.5, unit = "cm"))

print(figure_l5)

# ============================================================================
# APPENDIX L6: Regression Table for Immigrant Women Feelings
# ============================================================================

#Run the regression models
lm.us.w <- lm(ft_wom_r ~ conditions:job.lowskill:female +
                conditions + job.lowskill + female +
                inc_6_r + edu_6_r + age_6_r + pride_5_r + 
                econ_notsat_5_r + econ_own_notsat_5_r +
                race_5 + immi.first +
                job_conf_5_r + 
                pid_REP,
              data = replication_data)
lm.us.w.base <- lm(ft_wom_r ~ 
                conditions + job.lowskill + female +
                inc_6_r + edu_6_r + age_6_r + pride_5_r + 
                econ_notsat_5_r + econ_own_notsat_5_r +
                race_5 + immi.first +
                job_conf_5_r + 
                pid_REP,
              data = replication_data)

# Create regression table for immigrant women feelings
stargazer(lm.us.w.base, lm.us.w, 
          title = "",
          header = FALSE, 
          single.row = TRUE, 
          no.space = TRUE, 
          font.size = "small",
          dep.var.labels = "Feelings toward Immigrant Women",
          order = c("conditionsTreat2. Low-skilled Immigration:job.lowskill:female",
                    "conditionsTreat1. High-skilled Immigration:job.lowskill:female", 
                    "conditionsControl:job.lowskill:female",
                    "conditionsTreat2. Low-skilled Immigration",
                    "conditionsTreat1. High-skilled Immigration", 
                    "job.lowskill",
                    "female",
                    "inc_6_r",
                    "edu_6_r", 
                    "age_6_r",
                    "pride_5_r",
                    "econ_notsat_5_r",
                    "econ_own_notsat_5_r",
                    "race_5black",
                    "race_5latinx",
                    "race_5other", 
                    "race_5white",
                    "immi.first",
                    "job_conf_5_r",
                    "pid_REP"),
          covariate.labels = c("Treat2:Job(LowSkill):female",
                               "Treat1:Job(LowSkill):female",
                               "Control:Job(LowSkill):female", 
                               "Treat2(Low-skilled.Immi)",
                               "Treat1(High-skilled.Immi)",
                               "Participants.Job(LowSkill)",
                               "Female", 
                               "Income",
                               "Education",
                               "Age",
                               "Nationality.Pride",
                               "National.Economy.Dissatisfied", 
                               "Own.Finance.Dissatisfied",
                               "Race: Black",
                               "Race: Latinx",
                               "Race: Other",
                               "Race: White",
                               "First.Generation.Immigrant",
                               "Job.Security",
                               "pid.REP"),
          flip = TRUE)

# ============================================================================
# APPENDIX L7: Regression Table for Immigrant Men Feelings
# ============================================================================

#Run the regression models
lm.us.m <- lm(ft_men_r ~ conditions:job.lowskill:female +
                conditions + job.lowskill + female +
                inc_6_r + edu_6_r + age_6_r + pride_5_r + 
                econ_notsat_5_r + econ_own_notsat_5_r +
                race_5 + immi.first +
                job_conf_5_r + 
                pid_REP,
              data = replication_data)
lm.us.m.base <- lm(ft_men_r ~ 
                     conditions + job.lowskill + female +
                     inc_6_r + edu_6_r + age_6_r + pride_5_r + 
                     econ_notsat_5_r + econ_own_notsat_5_r +
                     race_5 + immi.first +
                     job_conf_5_r + 
                     pid_REP,
                   data = replication_data)

# Create regression table for immigrant men feelings
stargazer(lm.us.m.base, lm.us.m, 
          title = "",
          header = FALSE, 
          single.row = TRUE, 
          no.space = TRUE, 
          font.size = "small",
          dep.var.labels = "Feelings toward Immigrant Men",
          order = c("conditionsTreat2. Low-skilled Immigration:job.lowskill:female",
                    "conditionsTreat1. High-skilled Immigration:job.lowskill:female", 
                    "conditionsControl:job.lowskill:female",
                    "conditionsTreat2. Low-skilled Immigration",
                    "conditionsTreat1. High-skilled Immigration", 
                    "job.lowskill",
                    "female",
                    "inc_6_r",
                    "edu_6_r", 
                    "age_6_r",
                    "pride_5_r",
                    "econ_notsat_5_r",
                    "econ_own_notsat_5_r",
                    "race_5black",
                    "race_5latinx",
                    "race_5other", 
                    "race_5white",
                    "immi.first",
                    "job_conf_5_r",
                    "pid_REP"),
          covariate.labels = c("Treat2:Job(LowSkill):female",
                               "Treat1:Job(LowSkill):female",
                               "Control:Job(LowSkill):female", 
                               "Treat2(Low-skilled.Immi)",
                               "Treat1(High-skilled.Immi)",
                               "Participants.Job(LowSkill)",
                               "Female", 
                               "Income",
                               "Education",
                               "Age",
                               "Nationality.Pride",
                               "National.Economy.Dissatisfied", 
                               "Own.Finance.Dissatisfied",
                               "Race: Black",
                               "Race: Latinx",
                               "Race: Other",
                               "Race: White",
                               "First.Generation.Immigrant",
                               "Job.Security",
                               "pid.REP"),
          flip = TRUE)
